
*----------------------------------------------------------------------------
**#  Set paths
*----------------------------------------------------------------------------


global replica_dir="<add path to your working directory>"
global data_dir="$replica_dir/data"
global output_dir="$replica_dir/output"

global sned_raw_data_dir="<path to your SNED data directory>"


/*----------------------------------------------------------------------------*/

**#  Sned 2014

/*----------------------------------------------------------------------------*/

import excel "$sned_raw_data_dir/sned2014-2015/20150409_SNED_2014_2015.xlsx", sheet("Sheet1") firstrow case(lower) clear

keep rbd cluster sel indicer  rural_rbd tipo_est 

rename  cluster hg
gen sned_yr=2014

save "$data_dir/sned2014.dta", replace

/*----------------------------------------------------------------------------*/

**#  Sned 2012

/*
Note: mistake in cod_depe for 2012. Use the cod_depe that comes from SIMCE files.
----------------------------------------------------------------------------*/

import excel "$sned_raw_data_dir/sned2012-2013/20130712_SNED_2012_2013.xlsx", sheet("Hoja2") firstrow case(lower) clear

keep rbd cluster sel indicer  rural_rbd tipo_est 

rename cluster hg
gen sned_yr=2012

save "$data_dir/sned2012.dta", replace


/*----------------------------------------------------------------------------*/

**#  Sned 2010

/*----------------------------------------------------------------------------*/

import excel "$sned_raw_data_dir/sned2010-2011/proceso 2010-2011.xls", sheet("Base_SNED_2010_2011_MINEDUC_Mar") firstrow case(lower) clear

rename 	(rbd_n  depend    cod_area		cluster) /// 
		(rbd    cod_depe  rural_rbd		hg)

recode rural_rbd (2=1) (1=0) 

keep rbd hg sel indicer  rural_rbd tipo_est 

gen sned_yr=2010

save "$data_dir/sned2010.dta", replace


/*----------------------------------------------------------------------------*/

**#  Sned 2008

/*----------------------------------------------------------------------------*/

import excel "$sned_raw_data_dir/sned2008-2009/proceso 2008-2009.xls", sheet("Base_SNED2008_20Marzo_MINEDUC") firstrow case(lower) clear

tab selección
gen 	sel=1 if selección=="Sí, con derecho a percibir el 100% de la Subvención por Desempeño de Excelencia"
replace sel=2 if selección=="Sí, con derecho a percibir el 60% de la Subvención por Desempeño de Excelencia"
replace sel=3 if selección=="No"

rename 	(cod_area	tipoest 	clusters) /// 
		(rural_rbd	tipo_est 	hg)

recode rural_rbd (2=1) (1=0) 

keep rbd hg sel indicer  rural_rbd tipo_est 

gen sned_yr=2008

save "$data_dir/sned2008.dta", replace


/*----------------------------------------------------------------------------*/

**#  Sned 2006

/*----------------------------------------------------------------------------*/

import excel "$sned_raw_data_dir/sned2006-2007/proceso 2006-2007.xls", sheet("sned2006-2007") firstrow case(lower) clear
 
gen 	sel=1 if sel25_35==1
replace sel=2 if sel25_35==2
replace sel=3 if sel25_35==0

rename 	(cod_area	tipoest 	clusters) /// 
		(rural_rbd	tipo_est 	hg)

recode rural_rbd (2=1) (1=0) 

keep rbd hg sel indicer  rural_rbd tipo_est 

gen sned_yr=2006

save "$data_dir/sned2006.dta", replace


*------------------------------------------------------------------------------

**# Stack all together

*------------------------------------------------------------------------------

use 		 "$data_dir/sned2014.dta", clear
append using "$data_dir/sned2012.dta"
append using "$data_dir/sned2010.dta"
append using "$data_dir/sned2008.dta"
append using "$data_dir/sned2006.dta"

save "$data_dir/stacked_sned.dta", replace


/*----------------------------------------------------------------------------*/
 
**# Derived variables
/*	
	- Number of winner/looser schools per HG
	- Cutoff variables
	- Mean of cutoffs for each regression sample
*/


***

tab sel , gen(nrschhg_sel)

collapse (sum) nrschhg_sel1 nrschhg_sel2 nrschhg_sel3, by(hg sned_yr)

la var nrschhg_sel1 "numb schools in hg with a full bonus" 
la var nrschhg_sel2 "numb schools in hg with a partial bonus" 
la var nrschhg_sel3 "numb schools in hg with no bonus"

merge 1:m hg sned_yr using "$data_dir/stacked_sned.dta"
drop _merge

save "$data_dir/stacked_sned.dta", replace


***

gen cut25=.
gen cut10=.

foreach year in  2006 2008 2010 2012 2014 {
			
		levelsof hg
		
		local hg_lev=r(levels)
		
		foreach lev in `hg_lev' {
		di "`lev'"	

		* top 25%
		sum indicer 			if sel==1 				& hg==`lev' & sned_yr==`year'				 
		replace cut25=r(min) 	if inlist(sel, 1, 2) 	& hg==`lev' & sned_yr==`year'
		 
		* additional 10%
		sum indicer 			if sel==2 				& hg==`lev' & sned_yr==`year'			
		replace cut10=r(min) 	if inlist(sel, 2, 3) 	& hg==`lev' & sned_yr==`year'	
			
		}
}

gen st_ind25= round((indicer-cut25), 0.00001)
gen st_ind10= round((indicer-cut10), 0.00001)

save "$data_dir/stacked_sned.dta", replace


